Fin most abundant taxa across sites:

sapply(c(biobakery_function, alpha, beta, taxa, norm, heat, varia), source, chdir = TRUE)

Data loading:

input <- list(metpahlan_ps = "~/Documents/GitHub/oral-microbiome/data/processed/metaphlan_ps.RDS",
              motus_ps = "~/Documents/GitHub/oral-microbiome/data/processed/motus/physeq.RDS",
              pathway_ps = "~/Documents/GitHub/oral-microbiome/data/processed/humann_pw_ps.RDS",
              kegg_ps = "~/Documents/GitHub/oral-microbiome/data/processed/humann_kegg_ps.RDS",
              l4c_ps = "~/Documents/GitHub/oral-microbiome/data/processed/humann_l4c_ps.RDS",
              go_ps = "~/Documents/GitHub/oral-microbiome/data/processed/humann_go_ps.RDS",
              infogo_ps = "~/Documents/GitHub/oral-microbiome/data/processed/humann_infogo_ps.RDS",
              metac_ps = "~/Documents/GitHub/oral-microbiome/data/processed/humann_metac_ps.RDS",
              pfam_ps = "~/Documents/GitHub/oral-microbiome/data/processed/humann_pfam_ps.RDS")
purrr::map(input, 
           readRDS) -> objects

Top taxa based on metaphlan:

Investigate most abundant taxa over sites:

sample_data(objects$metpahlan_ps$physeq)$Site_Health <- paste0(sample_data(objects$metpahlan_ps$physeq)$Oral_Site,
                                                               "_",
                                                               sample_data(objects$metpahlan_ps$physeq)$Health_status)

Overall:

physeq_most_abundant(physeq = objects$metpahlan_ps$physeq,
                     group_var = "Site_Health",
                     ntax = 3) -> most_abundant

most_abundant
## [1] "Actinomyces_oris"            "Corynebacterium_matruchotii"
## [3] "Neisseria_flavescens"        "Rothia_dentocariosa"        
## [5] "Rothia_mucilaginosa"         "Streptococcus_parasanguinis"
## [7] "Streptococcus_salivarius"    "Veillonella_atypica"        
## [9] "Veillonella_parvula"

Heatmap those:

objects$metpahlan_ps$physeq %>%
  transform_sample_counts(function(x) x/sum(x) * 100) %>%
  phyloseq::subset_taxa(Species %in% most_abundant) %>%
  phyloseq_ampvis_heatmap(transform = FALSE,
                          group_by = "Subject",#treatment
                          facet_by = c("Oral_Site", "Health_status", "Subject"),
                          ntax = 100,
                          tax_aggregate = "Species",
                          tax_add = NULL) -> p
## Warning: There are only 9 taxa, showing all
p + facet_grid(. ~ Oral_Site + Health_status, scales = "free", space = "free") + 
  scale_fill_viridis_c(breaks = c(0,  0.01, 1, 10, 25, 50, 75, 100), 
                       labels = c(0,  0.01, 1, 10, 25,50, 75, 100), 
                       trans = scales::pseudo_log_trans(sigma = 0.1))  + 
  theme_classic() + 
  theme(axis.text.x = element_text(angle = 90, hjust = 1, size = 6)) + 
  theme(axis.text.y = element_text(angle = 0,  size = 8))

Saliva & Tongue:

Test bacterial activity are compatible in Saliva vs local site only in health:

objects$metpahlan_ps$physeq %>%
  subset_samples(Health_status == "Healthy control") %>%
  subset_samples(Oral_Site %in% c("SALIVA", "TONGUE_BIOFILM")) -> tmp

tmp %>%
  physeq_most_abundant(physeq = .,
                     group_var = "Oral_Site",
                     ntax = 5) -> ma_saliva_tongue

ma_saliva_tongue
## [1] "Actinomyces_sp_ICM47"        "Neisseria_flavescens"       
## [3] "Prevotella_melaninogenica"   "Rothia_mucilaginosa"        
## [5] "Streptococcus_parasanguinis" "Streptococcus_salivarius"   
## [7] "Veillonella_atypica"

Heatmap those:

tmp %>%
  transform_sample_counts(function(x) x/sum(x) * 100) %>%
  phyloseq::subset_taxa(Species %in% ma_saliva_tongue) %>%
  phyloseq_ampvis_heatmap(transform = FALSE,
                          group_by = "Subject",#treatment
                          facet_by = c("Oral_Site", "Health_status", "Subject"),
                          ntax = 100,
                          tax_aggregate = "Species",
                          tax_add = NULL) -> p
## Warning: There are only 7 taxa, showing all
p + facet_grid(. ~ Oral_Site + Health_status, scales = "free", space = "free") + 
  scale_fill_viridis_c(breaks = c(0,  0.01, 1, 10, 25, 50, 75, 100), 
                       labels = c(0,  0.01, 1, 10, 25,50, 75, 100), 
                       trans = scales::pseudo_log_trans(sigma = 0.1))  + 
  theme_classic() + 
  theme(axis.text.x = element_text(angle = 90, hjust = 1, size = 6)) + 
  theme(axis.text.y = element_text(angle = 0,  size = 8))

Looks good. We want to check their activities among those two sites.

Load pathway abundance table and have a look:

objects$pathway_ps$physeq  %>%
  humann2_species_contribution(meta_data_var = c("Subject", "sample_Sample" ,"Type", "Oral_Site", "Health_status")) %>%
  separate(Feature, into = c("code", "name"), sep = ": ", remove = FALSE) %>%
  dplyr::filter(!is.na(name))  -> pwy
## Warning in speedyseq::psmelt(.): The sample variables: 
## Sample
##  have been renamed to: 
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 720 rows [1029,
## 1903, 1961, 2338, 2461, 2854, 3562, 3891, 4578, 5649, 5690, 5849, 5854, 6607,
## 6843, 7028, 7077, 7309, 7819, 8576, ...].
pwy %>%
  distinct(name, .keep_all = TRUE) %>%
  DT::datatable()

Visualize RNA/DNA pathway expression:

pwy %>%
  dplyr::filter(Health_status == "Healthy control",
                Oral_Site %in% c("SALIVA", "TONGUE_BIOFILM")) %>%
  humann2_RNA_DNA_plot(x_plot = "log10(DNA)", 
                       y_plot = "log10(RNA)", 
                       color = "Species", 
                       fill = "Species",
                       shape = "Genus",
                       group = "Species",
                       filter_species = ma_saliva_tongue,
                       facet_formula = "Oral_Site ~ .") -> plot
## Warning in if (filter_species != FALSE) {: the condition has length > 1 and only
## the first element will be used
plot$legend

‘triangle plots’

plot$plot  + 
  scale_fill_viridis_d() + 
  scale_color_viridis_d()

# plot$plot %>%
#   plotly::ggplotly()

Boxplot of each Species at each site based on overall pathway.

pwy %>%
  dplyr::filter(Health_status == "Healthy control",
                Oral_Site %in% c("SALIVA", "TONGUE_BIOFILM")) %>%
  # filter(RNA_DNA > mean(RNA_DNA, na.rm = TRUE)) %>%
  # dplyr::filter(grepl("ose",name)) %>%
  humann2_RNA_DNA_ratio_plot(x_plot = "Species", 
                       y_plot = "log2(RNA_DNA)", 
                       color = "Oral_Site", 
                       fill = "Oral_Site",
                       shape = NULL,
                       filter_species = ma_saliva_tongue,
                       facet_formula = ". ~ .",
                       export_legend = TRUE) -> plot
## Warning in if (filter_species != FALSE) {: the condition has length > 1 and only
## the first element will be used
plot$legend

plot$plot + ggpubr::rotate()

Test differences among species and between sites:

plot$plot$data %>% 
  ggpubr::compare_means(RNA_DNA ~ Oral_Site,
                      group.by = c("Species"),
                      data = .,
                      method = "wilcox.test",
                      p.adjust.method = "fdr") %>%
  filter(p.adj < 0.05)  %>%
  DT::datatable()

From here we can focus at Strep. parasanguinis and check the pathways expressed by patients in diff/ sites:

pwy %>%
  dplyr::filter(Health_status == "Healthy control",
                Oral_Site %in% c("SALIVA", "TONGUE_BIOFILM")) %>%
  humann2_RNA_DNA_ratio_plot(x_plot = "name", 
                       y_plot = "log2(RNA_DNA)", 
                       color = "Oral_Site", 
                       fill = "Oral_Site",
                       shape = NULL,
                       filter_species = "Streptococcus_parasanguinis",
                       facet_formula = " . ~ Health_status",
                       export_legend = TRUE) -> plot
  
plot$legend

plot$plot

Test for signicant differences and same figure

plot$plot$data %>% 
  mutate(log2RNA_DNA = log2(RNA_DNA)) %>%
  ggpubr::compare_means(log2RNA_DNA ~ Oral_Site,
                      group.by = c("Feature"),
                      data = .,
                      # method = "wilcox.test",
                      p.adjust.method = "fdr") %>%
  filter(p.adj < 0.05) -> diff_signif  

diff_signif %>%
  DT::datatable()
diff_signif$Feature
##  [1] "PWY-1042: glycolysis IV"                                    
##  [2] "PWY-4041: &gamma;-glutamyl cycle"                           
##  [3] "PWY-6609: adenine and adenosine salvage III"                
##  [4] "ILEUSYN-PWY: L-isoleucine bios. I"                          
##  [5] "VALSYN-PWY: L-valine bios."                                 
##  [6] "PWY-6386: UDP-N-acetylmuramoyl-pentapeptide bios. II"       
##  [7] "BRANCHED-CHAIN-AA-SYN-PWY: spw of branched amino acid bios."
##  [8] "PWY-5103: L-isoleucine bios. III"                           
##  [9] "PWY-3001: spw of L-isoleucine bios. I"                      
## [10] "THRESYN-PWY: spw of L-threonine bios."                      
## [11] "PWY-6387: UDP-N-acetylmuramoyl-pentapeptide bios. I"        
## [12] "PWY-5686: UMP bios."                                        
## [13] "TRNA-CHARGING-PWY: tRNA charging"
pwy %>%
  dplyr::filter(Health_status == "Healthy control",
                Oral_Site %in% c("SALIVA", "TONGUE_BIOFILM")) %>%
  humann2_RNA_DNA_ratio_plot(x_plot = "name", 
                       y_plot = "log2(RNA_DNA)", 
                       color = "Oral_Site", 
                       fill = "Oral_Site",
                       shape = NULL,
                       filter_feature = diff_signif$Feature,
                       filter_species = "Streptococcus_parasanguinis",
                       facet_formula = " . ~ Health_status",
                       export_legend = TRUE) -> plot
## Warning in if (filter_feature != FALSE) {: the condition has length > 1 and only
## the first element will be used
plot$plot

pwy %>%
  # dplyr::filter(Health_status == "Healthy control",
   dplyr::filter(Oral_Site %in% c("SALIVA", "TONGUE_BIOFILM")) %>%
  humann2_RNA_DNA_ratio_plot(x_plot = "name", 
                       y_plot = "log2(RNA_DNA)", 
                       color = "Health_status", 
                       fill = "Health_status",
                       shape = NULL,
                       filter_species = "Streptococcus_parasanguinis",
                       facet_formula = " . ~ Oral_Site ",
                       export_legend = TRUE) -> plot
  
plot$legend

plot$plot

plot$plot$data %>% 
  mutate(log2RNA_DNA = log2(RNA_DNA)) %>%
  ggpubr::compare_means(log2RNA_DNA ~  Health_status,
                      group.by = c("Oral_Site","Feature"),
                      data = .,
                      # method = "wilcox.test",
                      p.adjust.method = "fdr") %>%
  filter(p.adj < 0.05) -> diff_signif  

diff_signif %>%
  DT::datatable()
diff_signif$Feature
##  [1] "PWY-1042: glycolysis IV"                                    
##  [2] "PWY-5100: pyruvate fermentation to acetate and lactate II"  
##  [3] "PWY-7219: adenosine ribonucleotides de novo bios."          
##  [4] "PWY-6121: 5-aminoimidazole ribonucleotide bios. I"          
##  [5] "PWY-6122: 5-aminoimidazole ribonucleotide bios. II"         
##  [6] "PWY-6277: spw of 5-aminoimidazole ribonucleotide bios."     
##  [7] "BRANCHED-CHAIN-AA-SYN-PWY: spw of branched amino acid bios."
##  [8] "PWY-5103: L-isoleucine bios. III"                           
##  [9] "PWY-6936: seleno-amino acid bios."                          
## [10] "PWY-3001: spw of L-isoleucine bios. I"                      
## [11] "PWY-6386: UDP-N-acetylmuramoyl-pentapeptide bios. II"       
## [12] "TRNA-CHARGING-PWY: tRNA charging"                           
## [13] "THRESYN-PWY: spw of L-threonine bios."                      
## [14] "PWY-6387: UDP-N-acetylmuramoyl-pentapeptide bios. I"        
## [15] "PWY-6123: inosine-5'-phosphate bios. I"
pwy %>%
  # dplyr::filter(Health_status == "Healthy control",
     # dplyr::filter(Oral_Site %in% c("SALIVA", "TONGUE_BIOFILM")) %>%
   dplyr::filter(Oral_Site %in% c("TONGUE_BIOFILM")) %>% 
  humann2_RNA_DNA_ratio_plot(x_plot = "name", 
                       y_plot = "log2(RNA_DNA)", 
                       color = "Health_status", 
                       fill = "Health_status",
                       shape = NULL,
                       filter_species = "Streptococcus_parasanguinis",
                       filter_feature = diff_signif %>% filter(Oral_Site == "TONGUE_BIOFILM") %>% pull(Feature),
                       facet_formula = " . ~ Oral_Site ",
                       export_legend = TRUE) -> plot
## Warning in if (filter_feature != FALSE) {: the condition has length > 1 and only
## the first element will be used
plot$legend

plot$plot

pwy %>%
  # dplyr::filter(Health_status == "Healthy control",
     # dplyr::filter(Oral_Site %in% c("SALIVA", "TONGUE_BIOFILM")) %>%
   dplyr::filter(Oral_Site %in% c("SALIVA")) %>% 
  humann2_RNA_DNA_ratio_plot(x_plot = "name", 
                       y_plot = "log2(RNA_DNA)", 
                       color = "Health_status", 
                       fill = "Health_status",
                       shape = NULL,
                       filter_species = "Streptococcus_parasanguinis",
                       filter_feature = diff_signif %>% filter(Oral_Site == "SALIVA") %>% pull(Feature),
                       facet_formula = " . ~ Oral_Site ",
                       export_legend = TRUE) -> plot
## Warning in if (filter_feature != FALSE) {: the condition has length > 1 and only
## the first element will be used
plot$legend

plot$plot

See from streptococci point of view.

First have a look at the top Genus

objects$metpahlan_ps$physeq %>%
  subset_samples(Health_status == "Healthy control") %>%
  subset_samples(Oral_Site %in% c("SALIVA", "TONGUE_BIOFILM")) -> tmp

tmp %>%
  physeq_most_abundant(physeq = .,
                     group_var = "Oral_Site",
                     ntax = 3,
                     tax_level = "Genus") -> ma_saliva_tongue_g

ma_saliva_tongue_g
## [1] "Neisseria"     "Rothia"        "Streptococcus" "Veillonella"

Heatmap those:

tmp %>%
  transform_sample_counts(function(x) x/sum(x) * 100) %>%
  phyloseq::subset_taxa(Genus %in% ma_saliva_tongue_g) %>%
  phyloseq_ampvis_heatmap(transform = FALSE,
                          group_by = "Subject",#treatment
                          facet_by = c("Oral_Site", "Health_status", "Subject"),
                          ntax = 100,
                          tax_aggregate = "Genus",
                          tax_add = NULL) -> p
## Warning: There are only 4 taxa, showing all
p + facet_grid(. ~ Oral_Site + Health_status, scales = "free", space = "free") + 
  scale_fill_viridis_c(breaks = c(0,  0.01, 1, 10, 25, 50, 75, 100), 
                       labels = c(0,  0.01, 1, 10, 25,50, 75, 100), 
                       trans = scales::pseudo_log_trans(sigma = 0.1))  + 
  theme_classic() + 
  theme(axis.text.x = element_text(angle = 90, hjust = 1, size = 6)) + 
  theme(axis.text.y = element_text(angle = 0,  size = 8))

strep. species?

objects$metpahlan_ps$physeq %>%
  subset_samples(Health_status == "Healthy control") %>%
  subset_samples(Oral_Site %in% c("SALIVA", "TONGUE_BIOFILM")) -> tmp

tmp %>%
  subset_taxa(Genus %in% c("Streptococcus")) %>%
  physeq_most_abundant(physeq = .,
                     group_var = "Oral_Site",
                     ntax = 5,
                     tax_level = "Species") -> ma_saliva_tongue_strep

ma_saliva_tongue_strep
## [1] "Streptococcus_australis"     "Streptococcus_infantis"     
## [3] "Streptococcus_mitis"         "Streptococcus_parasanguinis"
## [5] "Streptococcus_salivarius"    "Streptococcus_sanguinis"    
## [7] "Streptococcus_sp_A12"
tmp %>%
  transform_sample_counts(function(x) x/sum(x) * 100) %>%
  phyloseq::subset_taxa(Species %in% ma_saliva_tongue_strep) %>%
  phyloseq_ampvis_heatmap(transform = FALSE,
                          group_by = "Subject",#treatment
                          facet_by = c("Oral_Site", "Health_status", "Subject"),
                          ntax = 100,
                          tax_aggregate = "Species",
                          tax_add = NULL) -> p
## Warning: There are only 7 taxa, showing all
p + facet_grid(. ~ Oral_Site + Health_status, scales = "free", space = "free") + 
  scale_fill_viridis_c(breaks = c(0,  0.01, 1, 10, 25, 50, 75, 100), 
                       labels = c(0,  0.01, 1, 10, 25,50, 75, 100), 
                       trans = scales::pseudo_log_trans(sigma = 0.1))  + 
  theme_classic() + 
  theme(axis.text.x = element_text(angle = 90, hjust = 1, size = 6)) + 
  theme(axis.text.y = element_text(angle = 0,  size = 8))

let’s focus on the three most abundant:

strep_sel <- c("Streptococcus_infantis", "Streptococcus_parasanguinis", "Streptococcus_salivarius")
pwy %>%
  dplyr::filter(Health_status == "Healthy control",
                Oral_Site %in% c("SALIVA", "TONGUE_BIOFILM")) %>%
  # filter(RNA_DNA > mean(RNA_DNA, na.rm = TRUE)) %>%
  # dplyr::filter(grepl("ose",name)) %>%
  humann2_RNA_DNA_ratio_plot(x_plot = "Species", 
                       y_plot = "log2(RNA_DNA)", 
                       color = "Oral_Site", 
                       fill = "Oral_Site",
                       shape = NULL,
                       filter_species = strep_sel,
                       facet_formula = ". ~ .",
                       export_legend = TRUE) -> plot
## Warning in if (filter_species != FALSE) {: the condition has length > 1 and only
## the first element will be used
plot$legend

plot$plot + ggpubr::rotate()

Perform stats

plot$plot$data %>% 
  ggpubr::compare_means(RNA_DNA ~ Oral_Site,
                      group.by = c("Species"),
                      data = .,
                      method = "wilcox.test",
                      p.adjust.method = "fdr") %>%
  filter(p.adj < 0.05)  %>%
  DT::datatable()

Both Streptococcus_parasanguinis and Streptococcus_infantis overall display differential transicpromic activities among sites.

pwy %>%
  dplyr::filter(Health_status == "Healthy control",
                Oral_Site %in% c("SALIVA", "TONGUE_BIOFILM")) %>%
  humann2_RNA_DNA_ratio_plot(x_plot = "name", 
                       y_plot = "log2(RNA_DNA)", 
                       color = "Oral_Site", 
                       fill = "Oral_Site",
                       shape = NULL,
                       filter_species = strep_sel,
                       facet_formula = " . ~ Health_status + Species",
                       export_legend = TRUE) -> plot
## Warning in if (filter_species != FALSE) {: the condition has length > 1 and only
## the first element will be used
plot$legend

plot$plot

We could pintpoint pathways of interests here. -test for significant diff + health vs perio at heach sites -stay at pathway level before going deeper.

plot$plot$data %>% 
  # mutate(log2RNA_DNA = log2(RNA_DNA)) %>%
  ggpubr::compare_means(RNA_DNA ~  Oral_Site,
                      group.by = c("Species","Feature"),
                      data = .,
                      # method = "wilcox.test",
                      p.adjust.method = "fdr") %>%
  filter(p.adj < 0.05) -> diff_signif  

diff_signif %>%
  DT::datatable()
pwy %>%
  dplyr::filter(Health_status == "Healthy control") %>%
     dplyr::filter(Oral_Site %in% c("SALIVA", "TONGUE_BIOFILM")) %>%
   # dplyr::filter(Oral_Site %in% c("TONGUE_BIOFILM")) %>% 
  humann2_RNA_DNA_ratio_plot(x_plot = "name", 
                       y_plot = "log2(RNA_DNA)", 
                       color = "Oral_Site", 
                       fill = "Oral_Site",
                       shape = NULL,
                       filter_species = "Streptococcus_infantis",
                       filter_feature = diff_signif %>% filter(Species == "Streptococcus_infantis") %>% pull(Feature),
                       facet_formula = " . ~ Health_status ",
                       export_legend = TRUE) -> plot
## Warning in if (filter_feature != FALSE) {: the condition has length > 1 and only
## the first element will be used
plot$legend

plot$plot

pwy %>%
  dplyr::filter(Health_status == "Healthy control") %>%
     dplyr::filter(Oral_Site %in% c("SALIVA", "TONGUE_BIOFILM")) %>%
   # dplyr::filter(Oral_Site %in% c("TONGUE_BIOFILM")) %>% 
  humann2_RNA_DNA_ratio_plot(x_plot = "name", 
                       y_plot = "log2(RNA_DNA)", 
                       color = "Oral_Site", 
                       fill = "Oral_Site",
                       shape = NULL,
                       filter_species = "Streptococcus_parasanguinis",
                       filter_feature = diff_signif %>% filter(Species == "Streptococcus_parasanguinis") %>% pull(Feature),
                       facet_formula = " . ~ Health_status ",
                       export_legend = TRUE) -> plot
## Warning in if (filter_feature != FALSE) {: the condition has length > 1 and only
## the first element will be used
plot$legend

plot$plot

Streptococcus_salivarius

pwy %>%
  dplyr::filter(Health_status == "Healthy control") %>%
     dplyr::filter(Oral_Site %in% c("SALIVA", "TONGUE_BIOFILM")) %>%
   # dplyr::filter(Oral_Site %in% c("TONGUE_BIOFILM")) %>% 
  humann2_RNA_DNA_ratio_plot(x_plot = "name", 
                       y_plot = "log2(RNA_DNA)", 
                       color = "Oral_Site", 
                       fill = "Oral_Site",
                       shape = NULL,
                       filter_species = "Streptococcus_salivarius",
                       filter_feature = diff_signif %>% filter(Species == "Streptococcus_salivarius") %>% pull(Feature),
                       facet_formula = " . ~ Health_status ",
                       export_legend = TRUE) -> plot
## Warning in if (filter_feature != FALSE) {: the condition has length > 1 and only
## the first element will be used
plot$legend

plot$plot